# -*- coding: utf-8 -*-
import scrapy
from urllib.parse import urlencode
from scrapy import Spider,Request
import json
from images360.items import ImageItem

class ImagesSpider(scrapy.Spider):
    name = 'images'
    allowed_domains = ['images.so.com']
    start_urls = ['http://images.so.com/']


    #解析函数
    def parse(self, response):
        result = json.loads(response.text)
        for image in result.get('list'):
            item = ImageItem()
            item['id'] = image.get('imageid')
            item['url'] = image.get('qhimg_url')
            item['title'] = image.get('group_title')
            item['thumb'] = image.get('qhimg_thumb_url')
            yield item

    #定义起始需要爬取的url列表，首先从spider中发送给调度引擎
    def start_requests(self):
        data = {'ch':'photography','listtype':'new','temp':1}
        base_url = 'http://images.so.com/zj?'
        for page in range(1,self.settings.get('MAX_PAGE') + 1):
            data['sn'] = page * 30
            url = base_url + urlencode(data)
            yield Request(url,self.parse)